library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(ggplot2)
library(ggmap)
## Warning: 程辑包'ggmap'是用R版本4.3.2 来建造的
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(plotly)
## 
## 载入程辑包:'plotly'
## 
## The following object is masked from 'package:ggmap':
## 
##     wind
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(mgcv)
## 载入需要的程辑包:nlme
## 
## 载入程辑包:'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
library(gganimate)
## Warning: 程辑包'gganimate'是用R版本4.3.2 来建造的
knitr::opts_chunk$set(
  echo = TRUE,
  fig.width = 6,
  fig.asp = 0.6,
  out.width = "90%"
)

theme_set(theme_minimal() + theme(legend.position = "bottom"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)

scale_colour_discrete = scale_color_viridis_d
scale_fill_discrete = scale_fill_viridis_d

Preliminary data import and cleaning:

rats_df = 
  read_csv("./data/rat_sightings.csv", ) %>% 
  janitor::clean_names() %>% 
  select(unique_key, created_date, location_type, incident_zip, city, borough, latitude, longitude, location) %>% 
  separate(created_date, into = c("created_date", "time", "am_pm"), sep = " ") %>% 
  separate(created_date, into = c("month", "day", "year"), sep = "/")
## Rows: 232417 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (26): Created Date, Closed Date, Agency, Agency Name, Complaint Type, De...
## dbl  (5): Unique Key, X Coordinate (State Plane), Y Coordinate (State Plane)...
## lgl  (7): Vehicle Type, Taxi Company Borough, Taxi Pick Up Location, Bridge ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Making the density map

head(rats_df)
## # A tibble: 6 × 13
##   unique_key month day   year  time     am_pm location_type   incident_zip city 
##        <dbl> <chr> <chr> <chr> <chr>    <chr> <chr>           <chr>        <chr>
## 1   56242499 12    13    2022  06:52:10 PM    3+ Family Apt.… 10472        BRONX
## 2   56242808 12    13    2022  02:28:55 PM    Other (Explain… 10025        NEW …
## 3   57383018 04    21    2023  10:03:11 AM    3+ Family Apt.… 10026        NEW …
## 4   58545179 08    18    2023  03:37:48 AM    1-2 Family Dwe… 11222        BROO…
## 5   56610428 01    24    2023  11:45:24 PM    Other (Explain… 11204        BROO…
## 6   58788175 09    11    2023  11:52:46 PM    3+ Family Apt.… 11238        BROO…
## # ℹ 4 more variables: borough <chr>, latitude <dbl>, longitude <dbl>,
## #   location <chr>
register_stadiamaps(key = "7fe0e792-45a1-4318-8bea-cd0eb5c44b18")

nyc_map <- get_stadiamap(bbox = c(left = -74.26, bottom = 40.495, right = -73.7, top = 40.92),zoom=10)
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
ggmap(nyc_map)

rats_covid=rats_df|>
  filter(month %in% "12", 
         year%in% "2019")|>
  filter(borough != "Unspecified")

ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_covid)+
  theme_minimal() +
  labs(title = "Population Density Map before Covid")
## Warning: Removed 12 rows containing missing values (`geom_point()`).

rats_march=rats_df|>
  filter(month %in% "01", 
         year%in% "2023")|>
  filter(borough != "Unspecified")

ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_march)+
  theme_minimal() +
  labs(title = "Population Density Map as of March 2023")
## Warning: Removed 4 rows containing missing values (`geom_point()`).

rats_november=rats_df|>
  filter(month %in% "11", 
         year%in% "2023")|>
  filter(borough != "Unspecified")

ggmap(nyc_map)+
geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_november)+
  theme_minimal() +
  labs(title = "Population Density Map as of November 2023")
## Warning: Removed 12 rows containing missing values (`geom_point()`).

## Interpretation of the Graph >>The bar graph above illustrates the rat density before Covid, March 2023 and November 2023 over the boroughs in New York City. Between December, 2019 and March 2023, the graph shows significant difference in rat density especially in Manhattan. Between March and November 2023, the rat czar program was implemented on April 2023. These two graphs shows the comparison in density one month before the program started and the most recent density (as of November 2023). There are no significant change in density visible from the graph except it seems less dense in staten island.

rats_2023=rats_df|>
  filter(year %in% "2023")|>
  drop_na()|>
  ggplot(aes(x=longitude,y=latitude))+
  theme_minimal() +
  labs(title = "Population Density Map as of 2023")+
  geom_point(aes(color=borough,alpha=0.3))+
  facet_wrap(~ month, scales = "fixed", ncol = 6)
  
ggplotly(rats_2023)

A graph showing the density change throughout the year of 2023.While some density change is visible through the graph, it is not significant enough to show major changes made by the rat czar program. The rat problem still seems severe through out the entire new york city. The density change maybe explained by other factors such as weather change and rats’ breeding season.

rats_allyear=rats_df|>
  mutate(year=as.numeric(year))|>
  na.omit()|>
   filter(borough != "Unspecified")


rats_allyeargraph<-ggmap(nyc_map)+
  geom_point(aes(x=longitude,y=latitude,color=borough),alpha=0.3,data=rats_allyear)
  labs(title = "Population Density Map")+
  geom_point(aes(color=borough,alpha=0.3),na.rm=TRUE)
## NULL
map_with_animation <- rats_allyeargraph+
  transition_time(year) +
  ggtitle('Year: {frame_time}',
          subtitle = 'Frame {frame} of {nframes}')
num_years <-  max(rats_allyear$year) - min(rats_allyear$year) + 1
animate(map_with_animation, nframes = num_years, fp=1)

Grapj showing rat density change in the city of New York Over the years

Regression analyses

ANOVA test on location_type:

loc_df = 
  rats_df %>% 
    filter(year == "2018" | year == "2019" | year == "2020" | year == "2021" | year == "2022" | year == "2023") %>% 
    select(year, location_type) %>% 
    group_by(year, location_type) %>% 
    summarize(sightings = n()) %>% 
    arrange(desc(sightings)) %>% 
    filter(location_type == "3+ Family Apt. Building" | location_type == "1-2 Family Dwelling" | location_type == "Commercial Building" | location_type == "Construction Site")
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
loc_df %>% 
  ggplot(aes(x = year, y = sightings, fill = location_type)) + 
  geom_area(aes(group = location_type, alpha = 0.8)) +
  theme_bw() + 
  labs(title = "Rat sightings by location type 2018-2023")